notebook.community

Edit and run



In [ ]:

    
import tweepy
import sys
import jsonpickle
import os



In [ ]:

    
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

# API and ACCESS KEYS
API_KEY = '<TWITTER_API_KEY>'  # Change me
API_SECRET = '<TWITTER_API_SECRET>' # Change me

searchQuery = 'bash OR bashbleed OR shellshock OR cve-2014-6271'
maxTweets = 500000
tweetsPerQry = 100 #Max Allowed per Query
fName = 'shellshockTweets.txt'



In [ ]:

    
auth = tweepy.AppAuthHandler(API_KEY, API_SECRET)

# We make the client wait in case we exceed our rate of 450 queries / 15 min.
# So this can take a while before all tweets are fetched.
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

if (not api):
    print ("Can't Authenticate Check Creds!")
    sys.exit(-1)

last_id = -1
tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open(fName, 'w') as f:
    while tweetCount < maxTweets:
        try:
            if (last_id <= 0):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        max_id=str(last_id - 1))
            if not new_tweets:
                print("No more tweets found")
                break
            for tweet in new_tweets:
                f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\n')
            tweetCount += len(new_tweets)
            print("Downloaded {0} tweets".format(tweetCount))
            last_id = new_tweets[-1].id
        except tweepy.TweepError as e:
            print("some error : " + str(e))
            break

f.close()
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))

POST ACTION

After file is downloaded gzip it using gzip shellshockTweets.txt

TODO Automate the gzipping part